

/*------------------------------------------------------------------------------
	1. Housekeeping
------------------------------------------------------------------------------*/

	*Run housekeeping code
	qui do ".../replication_package/housekeeping.do"

	*Write log
	log using "$log_loc/table_2.log", text replace

	*Create Empty Tempfile to Store Regression Results
		tempfile table_2
		save `table_2', emptyok

/*------------------------------------------------------------------------------
	2. MEPS Covariance
------------------------------------------------------------------------------*/
	*Read in Dataset
		use "$data/h192.dta", clear
		
		*Check id (each observation is a person in a home)
			isid duid pid
		
		/*Generate Age variable (from Month (dobmm) and Year (dobyy) of DOB as of
		12/31/2016. */
			*Generate DOB variable
				gen day = 1
				gen dob = mdy(dobmm, day, dobyy)
					format dob %td
					
			*Generate age_num variable
				personage_num dob, currdate(mdy(12,31,2016)) gen(age_num age_num_days)
				gen flag_child = age_num <= 16
		
		*Generate Variable for Has Parent that will claim
			/*(1) Confirm that the mom/dads are consistent across the dataset.*/
				assert (mopid31x == mopid42x | inlist(-1, mopid31x, mopid42x)) & (mopid42x == mopid53x | inlist(-1, mopid42x,mopid53x)) & (mopid31x == mopid53x | inlist(-1, mopid31x,mopid53x))
				assert (dapid31x == dapid42x | inlist(-1, dapid31x,dapid42x)) & (dapid42x == dapid53x | inlist(-1, dapid42x,dapid53x)) & (dapid31x == dapid53x | inlist(-1, dapid31x,dapid53x)) | age_num > 16
				
			/*(2) Generate ids for mom and dad (as the maximum of rows). This works
			  as mom/dad flags are either the same (& >= 101) or -1 */
				gen mom_id = max(mopid31x, mopid42x, mopid53x) if  flag_child == 1
				gen dad_id = max(dapid31x, dapid42x, dapid53x) if flag_child == 1
				
			*(3) Create a joint variable that will group the parent and their child
				gen child_mother_id= cond(flag_child, mom_id, pid)
				gen child_father_id= cond(flag_child, dad_id, pid)
				
			/*(4) Identify if the child is claimed by their parent*/	
				bysort duid child_mother_id (flag_child): gen flag_claimed_mom = clmdep16[1] if child_mother_id != -1 & mom_id == child_mother_id
				bysort duid child_father_id (flag_child): gen flag_claimed_dad = clmdep16[1] if child_father_id != -1 & dad_id == child_father_id
			
			*Replace flag_claimed = 1 if mom or dad claims child
				gen flag_claimed = (flag_claimed_mom == 1) | (flag_claimed_dad == 1)
				
		*Generate Insurance Variable
			gen has_ins = inscov16 != 3
			
		*Estimate covariance
			*Set Survey settings
				svyset [pweight=perwt16f], strata(varstr) psu(varpsu) 
					
			*Get standard deviation for claiming and insurance
				svy: mean has_ins, subpop(flag_child)
					estat sd
					local sd_ins = r(sd)[1,1]

				svy: mean flag_claimed, subpop(flag_child)
					estat sd	
					local sd_claim = r(sd)[1,1]
					
			* Estimate correlation			
				corr_svy has_ins flag_claimed [pweight=perwt16f], strata(varstr) psu(varpsu) subpop(flag_child) 
					local corr = r(rho)
					
			*Estimate covariance
				local covariance = `corr' * `sd_ins' * `sd_claim'
					di "The covariance is `covariance'"			

/*------------------------------------------------------------------------------
	3. Clean Census Datasets
------------------------------------------------------------------------------*/

	*Import Census Datasets
		import excel using "$data/Census Count/nc-est2019-syasexn.xlsx", clear cellra(A6:M22)
			keep A K L M
			ren (A K L M) (age_num census_2017 census_2018 census_2019)

		tempfile census_data
		save `census_data', replace

		import excel using "$data/Census Count/nc-est2022-syasexn.xlsx", clear cellra(A6:E22)
			keep A C D
			ren (A C D ) (age_num census_2020 census_2021)

			merge 1:1 age_num using `census_data'
				assert _merge == 3
				drop _merge
	
			collapse (sum) census_*
			gen id = _n

	*Make the Counts Per Year
		reshape long census_, i(id) j(year)
			ren census_ census_ct
			drop id 
			isid year

		tempfile census_dta
		save `census_dta', replace


/*------------------------------------------------------------------------------
	4. Clean Main Datasets
------------------------------------------------------------------------------*/

	forvalues year = 2017/2021 {

		*Import Data
		use "$data/childrens`year'.dta", clear
	
			preserve
				*Keep f1040 filings
					rename state_file state
					fmerge m:1 state using "$data/statelist.dta"
						assert _merge != 2 //Confirm that all 51 states are in dataset
						drop if _merge != 3

				*Keep if age_num is within the range
					keep if age_num >= 0 & age_num <= 16

				*Keep if kid died after end of year or is currently alive
					keep if `year'*10000+1231<death_date|death_date==0

				*Because we are only using f1040 filers, do not need to drop those with impossible 1095s

			/*Generate variables for table*/	
				*Generate outcome variable on claiming
					gen onreturn_naive = file_inc != .
	
				*Generate variable for year
					gen year = `year'

			/*Prepare backup data*/
				*Collapse to the by-age_num level		
					collapse (sum) onreturn_naive, by(year)
			
			tempfile col_naive_est
			save `col_naive_est', replace
			restore

			preserve
				*Keep f1095 and f1040 filings.
					gen state = state_insurance
					replace state = state_file if state == ""

					fmerge m:1 state using "$data/statelist.dta"
						assert _merge != 2 //Confirm that all 51 states are in dataset
						drop if _merge != 3

				*Keep if age_num is within the range
					keep if age_num >= 0 & age_num <= 16

				*Keep if kid died after end of year or is currently alive
					keep if `year'*10000+1231<death_date|death_date==0

				*drop if months covered is impossible (only for filings for which we are using the 1095 variable. 
					drop if (num_cov_ins <= 0 | num_cov_ins > 12) & state_insurance != "" & state_file == ""

			/*Generate variables for table*/	
				*Generate outcome variable
					gen claim_has_eic = has_eic == "E"
					gen onreturn_w_ben = ((has_ctc > 0 & has_ctc != .) | (ref_has_ctc > 0 & ref_has_ctc != .) | (claim_has_eic > 0 & claim_has_eic != .) | (state_insurance != "")) & file_inc != .

	
				*Generate variable for year
					gen year = `year'

			/*Prepare backup data*/
				*Collapse to the by-age_num level		
					collapse (sum) onreturn_w_ben, by(year)

			tempfile col_ben_est
			save `col_ben_est', replace

			restore

			/*Restrict Sample*/
				*Keep observations in the 51 states (per the f1095 variable). Note that most of the dropped state values are missing.
					rename state_insurance state
					fmerge m:1 state using "$data/statelist.dta"
						assert _merge != 2 //Confirm that all 51 states are in dataset
						drop if _merge != 3

				*Keep if age_num is within the range
					keep if age_num >= 0 & age_num <= 16

				*Keep if kid died after end of year or is currently alive
					keep if `year'*10000+1231<death_date|death_date==0

				*Keep if months covered is possible
					keep if num_cov_ins > 0 & num_cov_ins <= 12
	
			/*Generate variables for table*/
				*Generate outcome variable on claiming
					gen onreturn = file_inc != .

					gen months_unins = 12 - num_cov_ins

				*Generate a ct variable
					gen ct = 1

				*Generate variable for year
					gen year = `year'
			
/*------------------------------------------------------------------------------
	5. Create Information for Linear Extrapolation Sample
------------------------------------------------------------------------------*/
		/* Estimate the claiming rate for those without insurance */
			preserve
				*Run Regressions to get account for relationship between claiming and months uninsured
					collapse (sum) ct onreturn, by(months_unins)

				*Adjust for claiming caps on electronic claiming
					replace onreturn = onreturn * 1.0017029 

				*Generate percent claimed outcome variable
					gen perc_claimed = onreturn/ct

				*Run Main Regression
					reg perc_claimed months_unins [pw = ct]
						local unins_effect = _b[months_unins]
						local constant = _b[_cons]
						local y_bar_u = _b[_cons] + _b[months_unins] * 12
						di "Using monthly effect of `unins_effect' and a constant of `constant', the estimated claim rate for those uninsured for the full 12 months is `y_bar_u'"

			restore
	
/*------------------------------------------------------------------------------
	6. Carry Out Estimated Calculations
------------------------------------------------------------------------------*/
			/*Prepare backup data*/
				*Collapse to the by-age_num level		
					collapse (sum) ct onreturn, by(year)
			
				*Merge in Naive Estimate
					merge 1:1 year using `col_naive_est'
						assert _merge == 3
						drop _merge

				*Merge in Benefit Estimate
					merge 1:1 year using `col_ben_est'
						assert _merge == 3
						drop _merge

				*Merge in Census Data
					merge 1:1 year using `census_dta'			
						assert _merge != 1
						keep if _merge == 3

				*Adjust for Claiming Limits
					replace onreturn = onreturn * 1.0017029
					replace onreturn_naive = onreturn_naive * 1.0017029
					replace onreturn_w_ben = onreturn_w_ben * 1.0017029

				*Create Percent Claimed variables
					gen perc_claimed = onreturn/ct
					gen perc_claimed_imputed = perc_claimed * (ct/census_ct) + `y_bar_u' * (1 - ct/census_ct)
					gen perc_claimed_naive = onreturn_naive/census_ct
					gen perc_claimed_benefit = onreturn_w_ben/census_ct
					gen perc_claimed_meps = perc_claimed - (`covariance'/(ct/census_ct))

				*Keep key outcomes
					keep perc_claimed_imputed perc_claimed_naive perc_claimed_benefit perc_claimed_meps year

		*Save results as a tempfile
			tempfile results_`year'
			save `results_`year'', replace
	}


/*------------------------------------------------------------------------------
	7. Export the results as an Excel File
------------------------------------------------------------------------------*/
	*Import and append the results from 2017 to 2021
	forvalues year = 2017/2021 {
		use `results_`year'', clear		
		append using `table_2'

		tempfile table_2
		save `table_2', replace
	
	}
	
		*Organize before export
			sort year
			order year perc_claimed_meps perc_claimed_imputed perc_claimed_naive perc_claimed_benefit

	*Export Results as an Excel File		
		export excel using "$output/table_backup_data.xlsx", first(var) sheet("adj_table", replace)